
# install.packages("foreign")
# install.packages("tm")
# install.packages("SnowballC")
# install.packages("wordcloud")
# install.packages("NLP")
# install.packages("RColorBrewer")
# install.packages("SnowballC")

library(foreign)
library(tm)
library(SnowballC)
library(wordcloud)
library(RColorBrewer)

# rm(list=ls())
# setwd("/Users/amaliejensen/Dropbox/Cities/AJ_files/PSRM Final/Data")

getwd()

### Pooled data
data <- read.delim('Data/q63_clean.txt')
subdataCorpus <- Corpus(VectorSource(data$Q63))
# If you want to check a specific line:subdataCorpus[["168"]][["content"]]
subdataCorpus <- tm_map(subdataCorpus, content_transformer(tolower))
subdataCorpus <- tm_map(subdataCorpus, removePunctuation)
subdataCorpus <- tm_map(subdataCorpus, removeWords, stopwords('english'))

# Word table dataset
dtm <- TermDocumentMatrix(subdataCorpus)
m <- as.matrix(dtm)
v <- sort(rowSums(m),decreasing=TRUE)
d <- data.frame(word = names(v),freq=v)
head(d, 10)
write.dta(d ,"Data/pooled_wordcloud_q63.dta")






